#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Project: spd-sxmcc
"""
@author: lyndon
@time Created on 2018/11/28 17:05
@desc
"""

from pyquery import PyQuery as pq

# html = '''<div class='content'>
#     <ul id = 'haha'>
#          <li class="item-0">first item</li>
#          <li class="item-1"><a href="link2.html">second item</a></li>
#          <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
#          <li class="item-1 active"><a href="link4.html">fourth item</a></li>
#          <li class="item-0"><a href="link5.html">fifth item</a></li>
#      </ul></div>'''
html = '''<h3 xmlns="http://www.w3.org/1999/xhtml" class="listTit"><a href="/xiaoqu/372074.html" target="_blank">&#29380;&#26449;&#34903;&#21335;&#30005;&#26426;&#21378;&#23487;&#33293;(CBS3)</a></h3>'''
doc = pq(html, parser="html")
print(doc)
print(type(doc))
a = doc('a').attr('href')
print(type(a))
print('a:' + a)



# at = a.attr('href').items()
# print(at)
# 注意class=item-0 active是一个class的属性，但是在pyquery里面要是中间也是空格隔开的话，
# 就变成了item-0下的active标签下的a标签了，所以这里空格必须改成点
# a = doc("a")
# print(type(a))
# print(a)
# 获取属性值的两种方法
# print(item.attr.href)
# print(item.attr('href'))

#
# from pyquery import PyQuery as pyq
#
# html = '''<p xmlns="http://www.w3.org/1999/xhtml" class="listTit"><a href="/xiaoqu/358503.html">hello</a></p>'''
#
# doc = pyq(html)
# d1 = doc('a').attr('href')
#
# print(d1)

# aa = doc('h3')
# print('----------')
# print(aa)
# print('----------')

#
# d=pq("<p id='my_id'><a href='http://hello.com'>hello</a></p>")
# d1 = d('a').attr('href')#返回http://hello.com
# d2 = d('p').attr('id')#返回my_id
#
# print(d1)
# print(d2)